{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from openai import OpenAI\n",
    "client = OpenAI()\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.metrics import roc_curve, auc\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "def get_prob(api_response, target_class):\n",
    "\n",
    "    # Extract the top log probabilities\n",
    "    top_logprobs = api_response.choices[0].logprobs.content[0].top_logprobs\n",
    "    \n",
    "    # Convert the log probabilities to probabilities for the specified target_class\n",
    "    prob = [np.exp(x.logprob) for x in top_logprobs if x.token == target_class]\n",
    "\n",
    "    # Check if the target class was found in the top_logprobs\n",
    "    # If not, set the result (res) to 0, indicating the target class has a probability of 0.\n",
    "    # Otherwise, set res to the first (and presumably only) probability found for target_class.\n",
    "    if len(prob) == 0:\n",
    "        res = 0\n",
    "    else:\n",
    "        res = prob[0]\n",
    "    return res"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('https://raw.githubusercontent.com/pycaret/pycaret/master/datasets/amazon.csv')\n",
    "\n",
    "df_0 = df[df.Positive==0].sample(100, random_state=42)\n",
    "df_1 = df[df.Positive==1].sample(100, random_state=42)\n",
    "df = pd.concat([df_0, df_1]).reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "system_prompt = \"\"\"You are an expert in sentiment analysis. You will receive a text that you have to classify. \n",
    "- if the text is positive, then return 'positive'\n",
    "- if the text is negative, then return 'negative' \n",
    "Return only 'positive' or 'negative'.\n",
    "The output should have 8 characters and all in lowercase. No other values are allowed!\n",
    "\"\"\"\n",
    "\n",
    "api_response = client.chat.completions.create(\n",
    "    model='gpt-3.5-turbo',\n",
    "    messages=[\n",
    "        {\"role\": \"system\", \"content\": system_prompt},\n",
    "        {\"role\": \"user\", \"content\":\"Today I have dinner plans.\"}\n",
    "        ],\n",
    "    temperature=0,\n",
    "    logprobs=True,\n",
    "    top_logprobs=5\n",
    ")\n",
    "api_response.choices[0].logprobs.content[0].top_logprobs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "prob_positive = get_prob(api_response, 'positive')\n",
    "prob_negative = get_prob(api_response, 'negative')\n",
    "\n",
    "sum_prob = prob_positive + prob_negative\n",
    "prob_positive = prob_positive/sum_prob\n",
    "prob_negative = prob_negative/sum_prob\n",
    "print(f\"{prob_positive=}\")\n",
    "print(f\"{prob_negative=}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "############################"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def gpt_sentimental_classif(text, model):\n",
    "    \n",
    "    # Define the system prompt that instructs the model on how to analyze sentiment.\n",
    "    system_prompt = \"\"\"You are an expert in sentiment analysis. You will receive a text that you have to classify. \n",
    "    - if the text is positive, then return 'positive'\n",
    "    - if the text is negative, then return 'negative' \n",
    "    Return only 'positive' or 'negative'.\n",
    "    The output should have 8 characters and all in lowercase. No other values are allowed!\n",
    "    \"\"\"\n",
    "\n",
    "    # Request the model to classify the sentiment of the provided text.\n",
    "    api_response = client.chat.completions.create(\n",
    "        model=model,\n",
    "        messages=[\n",
    "            {\"role\": \"system\", \"content\": system_prompt},\n",
    "            {\"role\": \"user\", \"content\":text}\n",
    "            ],\n",
    "        temperature=0,  # Ensure deterministic output for the sentiment analysis\n",
    "        logprobs=True,  # Request log probabilities for better accuracy in classification\n",
    "        top_logprobs=5  # Limit the number of top log probabilities returned\n",
    "    )\n",
    "\n",
    "    # Calculate the probabilities of the text being positive or negative.\n",
    "    prob_positive = get_prob(api_response, 'positive')\n",
    "    prob_negative = get_prob(api_response, 'negative')\n",
    "\n",
    "    # Normalize the probabilities so that they sum up to 1.\n",
    "    sum_prob = prob_positive + prob_negative\n",
    "    prob_positive = prob_positive/sum_prob\n",
    "    prob_negative = prob_negative/sum_prob\n",
    "\n",
    "    # Return the normalized probabilities.\n",
    "    return prob_positive, prob_negative\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os \n",
    "import pickle\n",
    "\n",
    "def make_exp(model, target_file):\n",
    "    \n",
    "    # Check if results file already exists (to avoid redo & to reduce unnecessary costs).\n",
    "    if os.path.exists(target_file):\n",
    "        # Load existing results if file is present.\n",
    "        with open(target_file, 'rb') as file:\n",
    "            res = pickle.load(file)\n",
    "    else:\n",
    "        # Initialize results list and perform sentiment analysis on each review.\n",
    "        res = []\n",
    "        for i in range(len(df)):\n",
    "            res.append(gpt_sentimental_classif(df.loc[i,'reviewText'], model))\n",
    "        # Save the new results to file.\n",
    "        with open(target_file, 'wb') as file:\n",
    "            pickle.dump(res, file)\n",
    "    \n",
    "    # Convert results list to DataFrame.\n",
    "    res = pd.DataFrame(res, columns=['prob_positive', 'prob_negative'])\n",
    "    return res\n",
    "\n",
    "probabilities_gpt_3 = make_exp('gpt-3.5-turbo', 'probabilities_gpt_3.pkl')\n",
    "probabilities_gpt_4 = make_exp('gpt-4', 'probabilities_gpt_4.pkl')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "probabilities_gpt_3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "probabilities_gpt_4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fpr_3, tpr_3, _ = roc_curve(df['Positive'], probabilities_gpt_3['prob_positive'])\n",
    "roc_auc_3 = auc(fpr_3, tpr_3)\n",
    "\n",
    "fpr_4, tpr_4, _ = roc_curve(df['Positive'], probabilities_gpt_4['prob_positive'])\n",
    "roc_auc_4 = auc(fpr_4, tpr_4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure()\n",
    "\n",
    "plt.plot(fpr_3, tpr_3, color='orange',\n",
    "         lw=2, label='ROC curve GPT-3.5 Turbo (area = %0.4f)' % roc_auc_3)\n",
    "\n",
    "plt.plot(fpr_4, tpr_4, color='blue',\n",
    "         lw=2, label='ROC curve GPT-4 (area = %0.4f)' % roc_auc_4)\n",
    "\n",
    "plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')\n",
    "\n",
    "plt.xlim([0.0, 1.0])\n",
    "plt.ylim([0.0, 1.05])\n",
    "plt.xlabel('False Positive Rate')\n",
    "plt.ylabel('True Positive Rate')\n",
    "plt.title('Receiver Operating Characteristic (ROC)')\n",
    "plt.legend(loc=\"lower right\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "predictions_gpt_3 = probabilities_gpt_3['prob_positive'].apply(lambda x: 1 if x >= 0.5 else 0)\n",
    "predictions_gpt_4 = probabilities_gpt_4['prob_positive'].apply(lambda x: 1 if x >= 0.5 else 0)\n",
    "\n",
    "accuracy_gpt_3 = (predictions_gpt_3 == df['Positive']).mean()\n",
    "accuracy_gpt_4 = (predictions_gpt_4 == df['Positive']).mean()\n",
    "\n",
    "print(f'Accuracy of GPT-3 model: {accuracy_gpt_3}')\n",
    "print(f'Accuracy of GPT-4 model: {accuracy_gpt_4}')\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
